Some housekeeping (again), installing necessary packages.
list.of.packages <- c("igraph", "tidygraph", "ggraph")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
rm(list.of.packages, new.packages)
So, briefly back to the questions: Why all the fuyy with graph objects and the like?
rm(list=ls())
articles <- readRDS("../input/biblio/publications.RDS")
articles %<>%
select(SR, AU, TI, JI, PY, AU_UN, DE, TC, NR, CR) %>%
rename(article = SR,
author = AU,
title = TI,
journal = JI,
year = PY,
affiliation = AU_UN,
keywords = DE,
citations = TC,
references = NR,
reference.list = CR)
articles %>%
arrange(desc(citations)) %>%
head(20)
articles[1, "reference.list"]
citation.el <- data.table(article = articles$article,
str_split_fixed(articles$reference.list, ";", max(articles$references, na.rm=T)))
citation.el <- melt(citation.el, id.vars = "article")[, variable:= NULL][value!=""]
citation.el %<>%
rename(reference = value) %>%
arrange(article,reference)
head(citation.el)
library(Matrix)
mat <- spMatrix(nrow=length(unique(citation.el$article)),
ncol=length(unique(citation.el$reference)),
i = as.numeric(factor(citation.el$article)),
j = as.numeric(factor(citation.el$reference)),
x = rep(1, length(as.numeric(citation.el$article))) )
row.names(mat) <- levels(factor(citation.el$article))
colnames(mat) <- levels(factor(citation.el$reference))
str(mat)
## Formal class 'dgTMatrix' [package "Matrix"] with 6 slots
## ..@ i : int [1:244252] 0 0 0 0 0 0 0 0 0 0 ...
## ..@ j : int [1:244252] 10526 14911 14934 15002 15291 17906 19745 20899 23183 23860 ...
## ..@ Dim : int [1:2] 6370 36611
## ..@ Dimnames:List of 2
## .. ..$ : chr [1:6370] "(HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG" "AARSTAD J, 2016, RES POLICY" "ABDI M, 2012, J INT BUS STUD" "ABDIH Y, 2006, IMF STAFF PAP" ...
## .. ..$ : chr [1:36611] "A D 1994 POSTBUREAUCRATIC ORG" "A W 1998 MANAGING TOTAL QUALI" "AAGE T 2004 DAN RES UN IND DYN D" "AAGE T 2006 THESIS COPENHAGEN BU" ...
## ..@ x : num [1:244252] 1 1 1 1 1 1 1 1 1 1 ...
## ..@ factors : list()
mat.art <- tcrossprod(mat)
mat.ref <- tcrossprod(t(mat))
rm(mat)
require(igraph)
g <- graph_from_adjacency_matrix(mat.art,
mode = "undirected",
weighted = T,
diag = F) ; rm(mat.art)
g <- simplify(g,
remove.multiple = T,
remove.loops = T,
edge.attr.comb = "sum")
temp <- tibble(article = V(g)$name) %>%
left_join(articles %>% select(article, year, citations, references), by = "article")
g <- set_vertex_attr(g, "year", value = temp$year)
g <- set_vertex_attr(g, "citations", value = temp$citations)
g <- set_vertex_attr(g, "references", value = temp$references)
rm(temp)
g
## IGRAPH a5afff4 UNW- 6370 3801377 --
## + attr: name (v/c), year (v/n), citations (v/n), references (v/n),
## | weight (e/n)
## + edges from a5afff4 (vertex names):
## [1] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ABDI M, 2012, J INT BUS STUD
## [2] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ABEBE GK, 2013, AGRIC SYST
## [3] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ACS ZJ, 2014, RES POLICY
## [4] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ADJEI-NSIAH S, 2016, CAH AGRIC
## [5] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ADNER R, 2001, MANAGE SCI
## + ... omitted several edges
E(g)$weight.count <- E(g)$weight
i <- V(g)[get.edges(g, E(g))[,1]]$references # degree of node V1 of every edge
j <- V(g)[get.edges(g, E(g))[,2]]$references # degree of node V2 of every edge
E(g)$weight <- E(g)$weight.count / (i + j - E(g)$weight.count)
rm(i, j)
g <- delete.edges(g, E(g)[weight < quantile(weight, 0.1, na.rm = T)])
g <- delete.vertices(g, strength(g) == 0)
g <- delete.vertices(g, strength(g) < quantile(strength(g), 0.25, na.rm = T) )
g
## IGRAPH e26e4c8 UNW- 4777 3015453 --
## + attr: name (v/c), year (v/n), citations (v/n), references (v/n),
## | weight (e/n), weight.count (e/n)
## + edges from e26e4c8 (vertex names):
## [1] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ACS ZJ, 2014, RES POLICY
## [2] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ADJEI-NSIAH S, 2016, CAH AGRIC
## [3] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ADNER R, 2001, MANAGE SCI
## [4] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ADNER R, 2002, STRATEG MANAGE J
## [5] (HANS) DE HAAN J, 2011, TECHNOL FORECAST SOC CHANG--ADNER R, 2016, STRATEG MANAGE J
## + ... omitted several edges
bibliometrix packagerm(list = ls())
require(bibliometrix)
?bibliometrix
M <- convert2df(readFiles("../input/biblio/biblio_nw1.txt"),
dbsource = "isi",
format = "plaintext")
##
## Converting your isi collection into a bibliographic dataframe
##
## Articles extracted 100
## Articles extracted 200
## Articles extracted 300
## Articles extracted 400
## Articles extracted 500
## Done!
##
##
## Generating affiliation field tag AU_UN from C1: Done!
results <- biblioAnalysis(M)
summary(results,
k = 20,
pause = F,
width = 130)
##
##
## Main Information about data
##
## Documents 500
## Sources (Journals, Books, etc.) 268
## Keywords Plus (ID) 2490
## Author's Keywords (DE) 1206
## Period 2008 - 2016
## Average citations per documents 150.6
##
## Authors 3562
## Author Appearances 3889
## Authors of single authored documents 17
## Authors of multi authored documents 3545
##
## Documents per Author 0.14
## Authors per Document 7.12
## Co-Authors per Documents 7.78
## Collaboration Index 7.51
##
## Document types
## J 496
## S 4
##
##
## Annual Scientific Production
##
## Year Articles
## 2008 65
## 2009 92
## 2010 83
## 2011 79
## 2012 66
## 2013 38
## 2014 40
## 2015 27
## 2016 10
##
## Annual Percentage Growth Rate -20.86186
##
##
## Most Productive Authors
##
## Authors Articles Authors Articles Fractionalized
## 1 HORVATH S 20 HORVATH S 3.88
## 2 GESCHWIND DH 12 LEYDESDORFF L 2.33
## 3 LANGFELDER P 8 DEARING JW 2.00
## 4 MILLER JA 7 LANGFELDER P 1.92
## 5 HE Y 6 GESCHWIND DH 1.66
## 6 BORSBOOM D 5 BODIN O 1.50
## 7 COPPOLA G 5 BOSCHMA R 1.50
## 8 ZHANG B 5 DAWSON S 1.50
## 9 BASSETT DS 4 DING Y 1.50
## 10 BULLMORE ET 4 ERNSTSON H 1.33
## 11 CHO JH 4 INGOLD K 1.33
## 12 GAO FY 4 JORDAN F 1.25
## 13 KNIGHT R 4 BRANDES U 1.17
## 14 LEYDESDORFF L 4 BLUTHGEN N 1.14
## 15 MENON V 4 BORSBOOM D 1.13
## 16 MILL J 4 MILLER JA 1.13
## 17 OLDHAM MC 4 SCHENSUL JJ 1.09
## 18 OPHOFF RA 4 MENON V 1.07
## 19 SAITO K 4 HE Y 1.06
## 20 SMITH SM 4 ASHTON W 1.00
##
##
## Top manuscripts per citations
##
## Paper TC TCperYear
## 1 RUBINOV M, 2010, NEUROIMAGE 2848 356.0
## 2 LANGFELDER P, 2008, BMC BIOINFORMATICS 2152 215.2
## 3 SMITH SM, 2009, P NATL ACAD SCI USA 2004 222.7
## 4 JOSTINS L, 2012, NATURE 1790 298.3
## 5 BUCKNER RL, 2009, J NEUROSCI 1274 141.6
## 6 VOINEAGU I, 2011, NATURE 752 107.4
## 7 DELOUKAS P, 2013, NAT GENET 703 140.6
## 8 EAGLE N, 2009, P NATL ACAD SCI USA 682 75.8
## 9 CHEN J, 2009, NUCLEIC ACIDS RES 672 74.7
## 10 THIELE I, 2010, NAT PROTOC 601 75.1
## 11 FRANSSON P, 2008, NEUROIMAGE 572 57.2
## 12 SUPEKAR K, 2008, PLOS COMPUT BIOL 539 53.9
## 13 XUE J, 2014, IMMUNITY 531 132.8
## 14 FOWLER JH, 2008, BRIT MED J 503 50.3
## 15 MILL J, 2008, AM J HUM GENET 480 48.0
## 16 BAILEY P, 2016, NATURE 452 226.0
## 17 AIROLDI EM, 2008, J MACH LEARN RES 443 44.3
## 18 SUPEKAR K, 2009, PLOS BIOL 413 45.9
## 19 BARBERAN A, 2012, ISME J 383 63.8
## 20 GARDY JL, 2011, NEW ENGL J MED 369 52.7
##
##
## Most Productive Countries (of corresponding authors)
##
## Country Articles Freq SCP MCP MCP_Ratio
## 1 USA 228 0.45691 159 69 0.303
## 2 CHINA 35 0.07014 18 17 0.486
## 3 UNITED KINGDOM 34 0.06814 16 18 0.529
## 4 NETHERLANDS 27 0.05411 17 10 0.370
## 5 GERMANY 26 0.05210 14 12 0.462
## 6 CANADA 20 0.04008 9 11 0.550
## 7 ITALY 18 0.03607 7 11 0.611
## 8 AUSTRALIA 16 0.03206 6 10 0.625
## 9 SPAIN 11 0.02204 3 8 0.727
## 10 SWEDEN 11 0.02204 6 5 0.455
## 11 SWITZERLAND 10 0.02004 6 4 0.400
## 12 FRANCE 7 0.01403 4 3 0.429
## 13 KOREA 7 0.01403 4 3 0.429
## 14 JAPAN 6 0.01202 6 0 0.000
## 15 BELGIUM 5 0.01002 1 4 0.800
## 16 AUSTRIA 4 0.00802 2 2 0.500
## 17 IRELAND 4 0.00802 2 2 0.500
## 18 FINLAND 3 0.00601 1 2 0.667
## 19 GEORGIA 3 0.00601 3 0 0.000
## 20 BRAZIL 2 0.00401 0 2 1.000
##
##
## SCP: Single Country Publications
##
## MCP: Multiple Country Publications
##
##
## Total Citations per Country
##
## Country Total Citations Average Article Citations
## 1 USA 39031 171.2
## 2 UNITED KINGDOM 7023 206.6
## 3 CHINA 3819 109.1
## 4 CANADA 3440 172.0
## 5 GERMANY 3344 128.6
## 6 NETHERLANDS 3132 116.0
## 7 AUSTRALIA 2128 133.0
## 8 ITALY 2046 113.7
## 9 SWEDEN 1502 136.5
## 10 SPAIN 1265 115.0
## 11 SWITZERLAND 1141 114.1
## 12 JAPAN 1002 167.0
## 13 FRANCE 801 114.4
## 14 KOREA 735 105.0
## 15 IRELAND 650 162.5
## 16 AUSTRIA 540 135.0
## 17 GEORGIA 429 143.0
## 18 BELGIUM 389 77.8
## 19 GREECE 384 192.0
## 20 FINLAND 324 108.0
##
##
## Most Relevant Sources
##
## Sources Articles
## 1 PROCEEDINGS OF THE NATIONAL ACADEMY OF SCIENCES OF THE UNITED STATES OF AMERICA 25
## 2 PLOS ONE 22
## 3 NEUROIMAGE 15
## 4 NATURE 10
## 5 ISME JOURNAL 9
## 6 NUCLEIC ACIDS RESEARCH 9
## 7 CELL 7
## 8 GENOME RESEARCH 7
## 9 BIOINFORMATICS 6
## 10 BMC BIOINFORMATICS 6
## 11 PLOS GENETICS 6
## 12 BRAIN 5
## 13 CANCER RESEARCH 5
## 14 JOURNAL OF INFORMETRICS 5
## 15 MOLECULAR SYSTEMS BIOLOGY 5
## 16 BMC GENOMICS 4
## 17 DECISION SUPPORT SYSTEMS 4
## 18 EXPERT SYSTEMS WITH APPLICATIONS 4
## 19 JOURNAL OF NEUROSCIENCE 4
## 20 LANDSCAPE AND URBAN PLANNING 4
##
##
## Most Relevant Keywords
##
## Author Keywords (DE) Articles Keywords-Plus (ID) Articles
## 1 SOCIAL NETWORK ANALYSIS 43 NETWORK ANALYSIS 41
## 2 NETWORK ANALYSIS 41 EXPRESSION 32
## 3 GRAPH THEORY 14 GENE-EXPRESSION 29
## 4 SOCIAL NETWORKS 13 NETWORKS 26
## 5 SYSTEMS BIOLOGY 10 ORGANIZATION 25
## 6 FUNCTIONAL CONNECTIVITY 9 IDENTIFICATION 24
## 7 CONNECTIVITY 7 COMPLEX NETWORKS 22
## 8 FMRI 7 CENTRALITY 21
## 9 NETWORK 7 DISEASE 21
## 10 CENTRALITY 6 DYNAMICS 20
## 11 TRACTOGRAPHY 6 PATTERNS 17
## 12 CLUSTERING 5 ALZHEIMERS-DISEASE 16
## 13 MICROARRAY 5 EVOLUTION 16
## 14 NETWORKS 5 MODEL 16
## 15 COMMUNITY 4 COMMUNITY STRUCTURE 15
## 16 COMPLEX NETWORKS 4 ESCHERICHIA-COLI 15
## 17 DIFFUSION TENSOR IMAGING 4 FUNCTIONAL CONNECTIVITY 15
## 18 GENE EXPRESSION 4 PERFORMANCE 15
## 19 METABOLOMICS 4 BEHAVIOR 14
## 20 MICRORNA 4 MASS-SPECTROMETRY 14
Most cited references (internally)
CR <- citations(M,
field = "article",
sep = ";")
cbind(CR$Cited[1:20])
## [,1]
## WASSERMAN S, 1994, SOCIAL NETWORK ANAL 63
## WATTS DJ, 1998, NATURE, V393, P440, DOI 101038/30918 49
## ZHANG B, 2005, STAT APPL GENET MO B, V4, DOI 102202/1544-61151128 47
## FREEMAN LC, 1979, SOC NETWORKS, V1, P215, DOI 101016/0378-8733(78)90021-7 42
## LANGFELDER P, 2008, BMC BIOINFORMATICS, V9, DOI 101186/1471-2105-9-559 37
## SHANNON P, 2003, GENOME RES, V13, P2498, DOI 101101/GR1239303 29
## OLDHAM MC, 2008, NAT NEUROSCI, V11, P1271, DOI 101038/NN2207 27
## FREEMAN LC, 1977, SOCIOMETRY, V40, P35, DOI 102307/3033543 26
## NEWMAN MEJ, 2003, SIAM REV, V45, P167, DOI 101137/S003614450342480 26
## GRANOVETTER MS, 1973, AM J SOCIOL, V78, P1360, DOI 101086/225469 25
## BARABASI AL, 1999, SCIENCE, V286, P509, DOI 101126/SCIENCE2865439509 23
## BULLMORE ET, 2009, NAT REV NEUROSCI, V10, P186, DOI 101038/NRN2575 23
## NEWMAN MEJ, 2006, P NATL ACAD SCI USA, V103, P8577, DOI 101073/PNAS0601602103 23
## ACHARD S, 2006, J NEUROSCI, V26, P63, DOI 101523/JNEUROSCI3874-052006 22
## HORVATH S, 2006, P NATL ACAD SCI USA, V103, P17402, DOI 101073/PNAS0608396103 22
## RAVASZ E, 2002, SCIENCE, V297, P1551, DOI 101126/SCIENCE1073374 22
## ACHARD S, 2007, PLOS COMPUT BIOL, V3, P174, DOI 101371/JOURNALPCBI0030017 21
## BENJAMINI Y, 1995, J ROY STAT SOC B MET, V57, P289 21
## GIRVAN M, 2002, P NATL ACAD SCI USA, V99, P7821, DOI 101073/PNAS122653799 21
## LANGFELDER P, 2008, BIOINFORMATICS, V24, P719, DOI 101093/BIOINFORMATICS/BTM563 21
NetMatrix <- biblioNetwork(M,
analysis = "co-citation",
network = "references",
sep = ";")
net <-networkPlot(NetMatrix,
n = 50,
Title = "Co-Citation Network",
type = "fruchterman",
size.cex = TRUE,
size = 20,
remove.multiple = FALSE,
labelsize = 0.7,
edgesize = 10,
edges.min = 5)
Journal (Source) co-citation analysis
M <- metaTagExtraction(M, "CR_SO", sep=";")
NetMatrix <- biblioNetwork(M,
analysis = "co-citation",
network = "sources",
sep = ";")
net <-networkPlot(NetMatrix,
n = 50,
Title = "Co-Citation Network",
type = "auto",
size.cex = TRUE,
size = 15,
remove.multiple = FALSE,
labelsize = 0.7,
edgesize = 10,
edges.min = 5)
by the way, the results contain an igraph obejct
net$graph
## IGRAPH 03742da UN-- 50 18378 --
## + attr: name (v/c), deg (v/n), size (v/n), label.cex (v/n), color (v/c), community (v/n), color (e/c), num (e/n),
## | width (e/n)
## + edges from 03742da (vertex names):
## [1] J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL
## [5] J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL
## [9] J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL
## [13] J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL
## [17] J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL
## [21] J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL
## [25] J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL J NEUROSCI--PLOS COMPUT BIOL
## + ... omitted several edges
Statistics
#netstat <- networkStat(NetMatrix)
#summary(netstat, k = 10)
Historical
histResults <- histNetwork(M,
min.citations = quantile(M$TC,0.75),
sep = ";")
## Articles analysed 100
## Articles analysed 125
net <- histPlot(histResults,
n = 20,
size.cex=TRUE,
size = 5,
labelsize = 3,
arrowsize = 0.5)
##
## Legend
##
## Paper DOI Year LCS GCS
## 2008 - 1 LANGFELDER P, 2008, BMC BIOINFORMATICS 10.1186/1471-2105-9-559 2008 37 2152
## 2008 - 3 SUPEKAR K, 2008, PLOS COMPUT BIOL 10.1371/JOURNAL.PCBI.1000100 2008 9 539
## 2008 - 8 HORVATH S, 2008, PLOS COMPUT BIOL 10.1371/JOURNAL.PCBI.1000117 2008 15 299
## 2008 - 14 MILLER JA, 2008, J NEUROSCI 10.1523/JNEUROSCI.4098-07.2008 2008 8 224
## 2009 - 22 SMITH SM, 2009, P NATL ACAD SCI USA 10.1073/PNAS.0905267106 2009 3 2004
## 2009 - 23 BUCKNER RL, 2009, J NEUROSCI 10.1523/JNEUROSCI.5062-08.2009 2009 9 1274
## 2009 - 26 SUPEKAR K, 2009, PLOS BIOL 10.1371/JOURNAL.PBIO.1000157 2009 5 413
## 2009 - 27 HE Y, 2009, PLOS ONE 10.1371/JOURNAL.PONE.0005226 2009 7 314
## 2009 - 35 PRELL C, 2009, SOC NATUR RESOUR 10.1080/08941920802199202 2009 3 231
## 2009 - 38 KONOPKA G, 2009, NATURE 10.1038/NATURE08549 2009 3 213
## 2009 - 44 BORGATTI SP, 2009, J SUPPLY CHAIN MANAG 10.1111/J.1745-493X.2009.03166.X 2009 3 185
## 2009 - 47 THEOCHARIDIS A, 2009, NAT PROTOC 10.1038/NPROT.2009.177 2009 3 171
## 2010 - 55 RUBINOV M, 2010, NEUROIMAGE 10.1016/J.NEUROIMAGE.2009.10.003 2010 18 2848
## 2010 - 59 HE Y, 2010, CURR OPIN NEUROL 10.1097/WCO.0B013E32833AA567 2010 4 287
## 2010 - 60 SKUDLARSKI P, 2010, BIOL PSYCHIAT 10.1016/J.BIOPSYCH.2010.03.035 2010 3 242
## 2010 - 66 MILLER JA, 2010, P NATL ACAD SCI USA 10.1073/PNAS.0914257107 2010 10 194
## 2011 - 75 VOINEAGU I, 2011, NATURE 10.1038/NATURE10110 2011 9 752
## 2011 - 83 BASSETT DS, 2011, NEUROIMAGE 10.1016/J.NEUROIMAGE.2010.09.006 2011 4 183
## 2012 - 90 BARBERAN A, 2012, ISME J 10.1038/ISMEJ.2011.119 2012 4 383
## 2012 - 98 BASSETT DS, 2012, NEUROIMAGE 10.1016/J.NEUROIMAGE.2011.10.002 2012 3 166
## 2013 - 104 BORSBOOM D, 2013, ANNU REV CLIN PSYCHO 10.1146/ANNUREV-CLINPSY-050212-185608 2013 3 355
## 2013 - 107 BREUER K, 2013, NUCLEIC ACIDS RES 10.1093/NAR/GKS1147 2013 3 216
The conceptual structure - Co-Word Analysis Co-word networks show the conceptual structure, that uncovers links between concepts through term co-occurences.
Conceptual structure is often used to understand the topics covered by scholars (so-called research front) and identify what are the most important and the most recent issues.
Dividing the whole timespan in different timeslices and comparing the conceptual structures is useful to analyze the evolution of topics over time.
Bibliometrix is able to analyze keywords, but also the terms in the articles’ titles and abstracts. It does it using network analysis or correspondance analysis (CA) or multiple correspondance analysis (MCA). CA and MCA visualise the conceptual structure in a two-dimensional plot.
Plot options:
NetMatrix <- biblioNetwork(M,
analysis = "co-occurrences",
network = "keywords",
sep = ";")
net <- networkPlot(NetMatrix,
normalize = "association",
n = 50,
Title = "Keyword Co-occurrences",
type = "fruchterman",
size.cex = TRUE, size = 20, remove.multiple = FALSE,
edgesize = 10,
labelsize = 3,
label.cex = TRUE,
label.n = 50,
edges.min = 2)
multiple correspondence analysis
CS <- conceptualStructure(M,
method = "CA",
field = "ID",
minDegree = 10,
k.max = 8,
stemming = f,
labelsize = 8,
documents = 20)
Co-word analysis draws clusters of keywords. They are considered as themes, whose density and centrality can be used in classifying themes and mapping in a two-dimensional diagram.
Thematic map is a very intuitive plot and we can analyze themes according to the quadrant in which they are placed: (1) upper-right quadrant: motor-themes; (2) lower-right quadrant: basic themes; (3) lower-left quadrant: emerging or disappearing themes; (4) upper-left quadrant: very specialized/niche themes.
Please see Cobo, M. J., López-Herrera, A. G., Herrera-Viedma, E., & Herrera, F. (2011). An approach for detecting, quantifying, and visualizing the evolution of a research field: A practical application to the fuzzy sets theory field. Journal of Informetrics, 5(1), 146-166.
NetMatrix <- biblioNetwork(M,
analysis = "co-occurrences",
network = "keywords",
sep = ";")
S <- normalizeSimilarity(NetMatrix,
type = "association")
net <- networkPlot(S,
n = 500,
Title = "Keyword co-occurrences",
type = "fruchterman",
labelsize = 2,
halo = FALSE,
cluster = "walktrap",
remove.isolates = FALSE,
remove.multiple = FALSE,
noloops = TRUE,
weighted = TRUE,
label.cex = TRUE,
edgesize = 5,
size = 1,
edges.min = 2)
Map <- thematicMap(net, NetMatrix,
S = S,
minfreq =5 )
plot(Map$map)
Cluster description
clusters <-Map$words %>%
arrange(Cluster, desc(Occurrences))
clusters %>%
select(Cluster, Words, Occurrences) %>%
group_by(Cluster) %>%
mutate(n.rel = Occurrences / sum(Occurrences) ) %>%
slice(1:3)